
## Citizen Type by Race 

library(Hmisc)
library(dplyr)
require(haven)
require(scales)
require(ggthemes)
require(readr)
require(gdata)
  data <- read_dta("anes_timeseries_2012_stata12.dta")


dat <- read_csv("anes_appended_anon.csv")
dat$category <- factor(dat$category)
#levels(dat$category)[3] <- "Unregistered"
levels(dat$category)[1] <- "Registered"
dat$category <- reorder.factor(dat$category, new.order = c(1,4,2,3))

myvars <- c("caseid",
            "dem_raceeth_x",
            "dem_age_r_x",
            "gender_respondent_x",
            "prevote_prefprwho",
            "postvote_prefprwho",
            "presvote2012_x",
            "defsppr_self")
            
            
dat <- merge(data[,match(myvars,names(data))],
             dat,
             by="caseid")


ok <- dat$dem_raceeth_x > 0
tab <- xtabs(dat$weight_ftf[ok] ~ dat$category[ok] +  dat$dem_raceeth_x[ok])
chisq.test(tab)

tab <- tab %>% prop.table(2) *100 %>% round
colnames(tab) <- c("White","Black","Hispanic","Other")
require(xtable)
xtable(tab, digits = 0, caption = "Distribution of citizen types (percentages), by race and ethnicity, ANES 2012 face-to-face respondents (weighted). $\\chi^2 = 111$, $p < .01$.  $n$ = 2,006.", label = "tab:race")

chisq.test(tab)

## Race by category
ok <- dat$dem_raceeth_x > 0
tab <- xtabs(dat$weight_ftf[ok] ~ dat$category[ok] +  dat$dem_raceeth_x[ok])
chisq.test(tab)

tab <- tab %>% prop.table(1) *100 %>% round
colnames(tab) <- c("White","Black","Hispanic","Other")
require(xtable)
xtable(tab, digits = 0, caption = "Distribution of citizen types (percentages), by race and ethnicity, ANES 2012 face-to-face respondents (weighted). $\\chi^2 = 111$, $p < .01$.  $n$ = 2,006.", label = "tab:race")



## Age, Wealth and Income


require(ggplot2)
require(ggthemes)
require(weights)
require(boot)
require(extrafont)
# Turn x axis into initials

category.color.scale <- c("#d7301f","#fc8d59", "#fdcc8a", "#fef0d9")

dat$contact <- ifelse(dat$cses_contct==1,1,0)
dat$ownhome <- ifelse(dat$dem3_ownhome==1,1,0)
dat$uninsured <- ifelse(dat$health_insured==1,0,1)
dat$obama <- ifelse(dat$pref_presrollup==1,1,0)
dat$nonwhite <- ifelse(dat$dem_racecps_white==1,0,1)
dat$shorttenure <- dat$dem3_lenaddr < 1

cols <- c("dem_age_r_x","incgroup_prepost_x","ownhome","shorttenure")
labels <- c("Median Age","Median Income\n(in $1,000's)","% Own Home","% Recently\nMoved")
cats <- levels(dat$category)

plot.l <- list()
for(i in 1:length(cols)){
  plot.l[[i]] <- list()
  for(j in 1:length(cats)){
    ok <- which(dat$category==cats[j])
    tmp <- wtd.t.test(dat[ok,cols[i]], weight = dat$weight_ftf[ok])
    if(length(unique(dat[ok,cols[i]])) > 5){ 
      tmp$additional["Mean"] <- wtd.quantile(dat[ok,cols[i]], weight = dat$weight_ftf[ok], probs = .5)
      ok.vec <- replicate(1000,sample(ok, length(ok), 1000), simplify = F)
      tmp$additional["Std. Err"] <- sd(sapply(ok.vec, function(ok) wtd.quantile(dat[ok,cols[i]], weight = dat$weight_ftf[ok], probs = .5)))
      }
    plot.l[[i]][[j]] <- data.frame(Group = cats[j], 
                               Question = labels[i], 
                               Mean = tmp$additional["Mean"], 
                               SE = tmp$additional["Std. Err"])
  }
  plot.l[[i]] <- rbind_all(plot.l[[i]])
}
plot.l <- rbind_all(plot.l)
plot.l$Group <- as.factor(plot.l$Group)

plot.l$Question <- reorder(as.factor(plot.l$Question), new.order = c(3,4,2,1))

plot.l$Group <- reorder(plot.l$Group, new.order = c(2,4,1,3))
levels(plot.l$Group) <- c("Reg", "UR", "ML","UL")


plot.l$Mean <- ifelse(! plot.l$Question %in% labels[1:2], 100*plot.l$Mean, plot.l$Mean)

plot.l$SE <- ifelse(! plot.l$Question %in% labels[1:2], 100*plot.l$SE, plot.l$SE)


p<- ggplot(plot.l) +
  scale_y_continuous('',limits=c(0,100), expand=c(0,0)) +
  theme_bw() + 
  geom_errorbar(aes(x = Group, y = Mean, ymin = Mean - 2*SE, ymax = Mean + 2*SE ),size=.6,width=0) + 
  geom_point(aes( x = Group, y=Mean, fill = Group), stat="identity",  alpha = .7, colour = "black", shape = 21) +
   #coord_flip() +
  facet_wrap(~Question, nrow = 1)+ 
  scale_x_discrete("")  +
  scale_colour_manual("",values = category.color.scale) +
  theme(text = element_text(family = "Helvetica", color = "#586e75"),
        strip.background = element_rect(fill = "white", colour = gray(.75), size = .5),
        strip.text = element_text(colour = "black",face = "bold", size = 12),
        panel.border=element_rect(colour = gray(.75), size = .5),
        panel.grid.major.x=element_blank(), 
        panel.grid.minor.x=element_blank(), 
        legend.position="top", 
        legend.key = element_blank()) +
  scale_fill_manual("", labels = levels(dat$category), values = category.color.scale)
ggsave("age_inc_homeown_uninsured.tiff",plot = p, width = 6, height = 3, units = "in")

 lm(incgroup_prepost_x ~ category, data = dat, weight = weight_ftf) %>% anova
lm(shorttenure ~ category, data = dat, weight = weight_ftf) %>% anova
lm(ownhome ~ category, data = dat, weight = weight_ftf) %>% anova
lm(dem_age_r_x ~ category, data = dat, weight = weight_ftf) %>% anova



## Politics


require(dplyr)
require(reshape2)
dat$category08 <- ifelse( dat$namematch==F, "Unlisted", NA)
dat$category08 <- ifelse(dat$namematch == T & dat$respondent_reg==F , "Unregistered" , dat$category08)
dat$category08 <- ifelse(dat$respondent_reg==T, "Registered", dat$category08)
dat$category08 <- ifelse(dat$e2008g==T & dat$namematch==T, "2008 Voter", dat$category08)
dat$category08 <- factor(dat$category08)
dat$category08 <- reorder(dat$category08, new.order = c(1,2,4,3))




dat$incshort <- ifelse(dat$incgroup_prepost_x< 32,"Less than $32,000",NA)
dat$incshort <- ifelse(dat$incgroup_prepost_x >=32 & dat$incgroup_prepost_x <=72 ,"$32,000-$72,000",dat$incshort)
dat$incshort <- ifelse(dat$incgroup_prepost_x > 72 ,"$72,000+",dat$incshort)
dat$incshort <- factor(dat$incshort)
dat$incshort <- reorder(dat$incshort, new.order = c(3,1,2))



dat$contact <- ifelse(dat$cses_contct==1,1,0)
dat$dcontact <- ifelse(dat$cses_ptycont_dc==1 | dat$cses_ptycont_dp==1,1,0)
dat$rcontact <- ifelse(dat$cses_ptycont_rc==1 | dat$cses_ptycont_rp==1,1,0)

dat$dcontact[is.na(dat$dcontact)] <- dat$contact[is.na(dat$dcontact)]
dat$rcontact[is.na(dat$rcontact)] <- dat$contact[is.na(dat$rcontact)]


dat$welfare <- ifelse(dat$fedspend_welfare==1,1,0)
#dat$racialresentment <- ifelse(dat$resent_try == 1 | dat$resent_try == 2,1,0)
dat$satisfaction_govt <- ifelse(dat$cses_satisdem == 1 | dat$cses_satisdem == 2,1,0)
dat$obama <- ifelse(dat$pref_presrollup == 1,1,0)
dat$libcon <-  ifelse(dat$libcpo_self %in% c(5,6,7),1,0)
dat$police <- ifelse(dat$cses_exppolc %in% c(1,2), 1, 0)
dat$defense <- (dat$defsppr_self-1) /7
dat$defense[dat$defense < 0] <- NA
dat$contact <- ifelse(dat$cses_contct==1,1,0)


cols <- c("contact","welfare","obama")
labels <- c("Any Campaign\nContact","Increase\nWelfare Spending","Obama Support\n(Two-Party Vote)")
cats <- levels(dat$category)

plot.l <- list()
for(i in 1:length(cols)){
  plot.l[[i]] <- list()
  for(j in 1:length(cats)){
    ok <- dat$category==cats[j] & !is.na(dat[,cols[i]])
    tmp <- wtd.t.test(dat[ok,cols[i]], weight = dat$weight_ftf[ok])
    plot.l[[i]][[j]] <- data.frame(Group = cats[j], 
                                   Question = labels[i], 
                                   Mean = tmp$additional["Mean"], 
                                   SE = tmp$additional["Std. Err"])
  }
  plot.l[[i]] <- rbind_all(plot.l[[i]])
}
plot.l <- rbind_all(plot.l)
plot.l$Group <- as.factor(plot.l$Group)


plot.l$Group <- reorder(plot.l$Group, new.order = c(2,4,1,3))
levels(plot.l$Group) <- c("Reg", "UR", "ML","UL")


plot.l$Question <- factor(plot.l$Question)
#plot.l$Question <- reorder(plot.l$Question, new.order = c(1,2,3))



q <- ggplot(plot.l) +
  scale_y_continuous('',  labels=percent_format(accuracy = 1), lim =c(0,NA), expand = c(0,0)) +
  theme_bw() + 
  geom_errorbar(aes(x = Group, y = Mean, ymin = Mean - 2*SE, ymax = Mean + 2*SE ),size=.6,width=0) + 
  geom_point(aes( x = Group, y=Mean, fill = Group), stat="identity",  alpha = .7, colour = "black", shape = 21) +
  #coord_flip() +
  facet_wrap(~Question, nrow=3, scales="free_y")+
  theme(text = element_text(family = "Helvetica", color = "#586e75"),
        strip.background = element_rect(fill = "white", colour = gray(.75), size = .5),
        strip.text = element_text(colour = "black",face = "bold", size = 12),
        panel.border=element_rect(colour = gray(.15), size = .25),
        panel.grid.major.x=element_blank(), 
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(), 
        
        legend.position="none", 
        legend.key = element_blank()) +
  scale_colour_manual("",  values = category.color.scale,labels = levels(dat$category)) +
  scale_fill_manual("",  values = category.color.scale,labels = levels(dat$category))
ggsave("contact_welfare_obama.tiff",plot = q, width = 4.6, height = 5, units = "in")



wtd.mean(dat$contact[dat$category == "Registered" & dat$respondent_reg & dat$e2008g], dat$weight_ftf[dat$category == "Registered" & dat$respondent_reg& dat$e2008g])

wtd.mean(dat$contact[dat$category == "Mislisted" & dat$respondent_reg & dat$e2008g], dat$weight_ftf[dat$category == "Mislisted" & dat$respondent_reg& dat$e2008g])

tab <- xtabs(dat$weight_ftf ~ dat$category + dat$pid_self) 
tab %>% chisq.test()

tab <- tab %>% prop.table(1) %>% round(2)*100
colnames(tab) <- c("Dem", "Ind", "Rep")
tab %>% xtable(digits=0, caption = "Percent identifying with each of the two parties, or identifying as independent, by category. $p<.01$.", label = "tab:pid")



wtd.quantile(dat$incgroup_prepost_x[dat$contact == 1], dat$weight_ftf[dat$contact == 1], .5   )
wtd.quantile(dat$incgroup_prepost_x[dat$contact == 0], dat$weight_ftf[dat$contact == 0], .5   )
lm(contact ~ incgroup_prepost_x, data = dat, weights = weight_ftf) %>% anova
lm(contact ~ incgroup_prepost_x+category, data = dat, weights = weight_ftf) %>% anova






## Residential Tenure and Income

require(reshape2)
require(scales)

dat$resten <- ifelse(dat$dem3_lenaddr < 3,"1",NA)
dat$resten <- ifelse(dat$dem3_lenaddr > 3 & dat$dem3_lenaddr < 5,"2", dat$resten)
dat$resten <- ifelse(dat$dem3_lenaddr > 5 & dat$dem3_lenaddr < 7,"3", dat$resten)
dat$resten <- ifelse(dat$dem3_lenaddr > 7 & dat$dem3_lenaddr < 9,"4", dat$resten)
dat$resten <- ifelse(dat$dem3_lenaddr > 9 ,"5", dat$resten)
dat$resten <- factor(dat$resten)
levels(dat$resten) <- c("0-3","3-5","5-7","7-9","10+")
tab <- xtabs(weight_ftf ~ resten + dem_raceeth_x,data=dat)
tab <- tab[,-c(1,2)]
chisq.test(tab)
#round(prop.table(tab,2)*100)

## income terciles
dat$incshort <- ifelse(dat$incgroup_prepost_x< 32,"<$32,000",NA)
dat$incshort <- ifelse(dat$incgroup_prepost_x >=32 & dat$incgroup_prepost_x <=72 ,"$32-72,000",dat$incshort)
dat$incshort <- ifelse(dat$incgroup_prepost_x > 72 ,"$72,000+",dat$incshort)
dat$incshort <- factor(dat$incshort)
dat$incshort <- gdata::reorder.factor(dat$incshort, new.order = c(3,1,2))

tab <- xtabs(weight_ftf ~ resten + category ,data=dat)
#prop.table(tab,c(1,3))
plotData <- list()
plotData[[1]] <- melt(as.data.frame(prop.table(tab,1)), id.vars = c("category","Freq"))
plotData[[1]]$question <- "Residential Tenure"


tab <- xtabs(weight_ftf ~ incshort + category ,data=dat)
#prop.table(tab,c(1,3))
plotData[[2]] <- melt(as.data.frame(prop.table(tab,1)), id.vars = c("category","Freq"))
plotData[[2]]$question <- "Income"
plotData <- bind_rows(plotData)
#plotData <- plotData %>% filter(category %in% c("Unlisted","Mislisted"))
plotData$value <- gdata::reorder.factor(as.factor(plotData$value), new.order = c(1,2,3,4,6,7,8,5))

r <- ggplot(plotData, aes(x = value, y = Freq, fill = category, group = category)) + 
  facet_grid( ~ question, scales = "free_x") + 
  geom_bar(stat = "identity", alpha = .7, colour = gray(.15), size = .25) + 
  theme_bw(base_size = 10)  + 
  #scale_y_continuous("% of Total", label = percent, limits = c(0,.3))  +
  scale_x_discrete("") +
  scale_y_continuous("Proportion of Category") +
  scale_fill_manual("",values = category.color.scale) +
  theme(text = element_text(family = "Helvetica", color = "#586e75"),
        strip.background = element_rect(fill = "white", colour = gray(.75), size = .5),
        strip.text = element_text(colour = "black",face = "bold", size = 12),
        panel.border=element_rect(colour = gray(.15), size = .25),
        panel.grid.major.x=element_blank(), 
        panel.grid.minor.x=element_blank(), 
        legend.position="top", 
        legend.key = element_blank())
ggsave("inc_restenure.tiff",plot = r, width = 4.6, height = 3, units = "in")








## Campaign Contact

require(dplyr)
require(reshape2)
dat$category08 <- ifelse( dat$namematch==F, "Unlisted", NA)
dat$category08 <- ifelse(dat$namematch == T & dat$respondent_reg==F , "Unregistered" , dat$category08)
dat$category08 <- ifelse(dat$respondent_reg==T, "Registered", dat$category08)
dat$category08 <- ifelse(dat$e2008g==T & dat$namematch==T, "2008 Voter", dat$category08)
dat$category08 <- factor(dat$category08)
dat$category08 <- reorder(dat$category08, new.order = c(1,2,4,3))




dat$incshort <- ifelse(dat$incgroup_prepost_x< 32,"Less than $32,000",NA)
dat$incshort <- ifelse(dat$incgroup_prepost_x >=32 & dat$incgroup_prepost_x <=72 ,"$32,000-$72,000",dat$incshort)
dat$incshort <- ifelse(dat$incgroup_prepost_x > 72 ,"$72,000+",dat$incshort)
dat$incshort <- factor(dat$incshort)
dat$incshort <- reorder(dat$incshort, new.order = c(3,1,2))



dat$contact <- ifelse(dat$cses_contct==1,1,0)
dat$dcontact <- ifelse(dat$cses_ptycont_dc==1 | dat$cses_ptycont_dp==1,1,0)
dat$rcontact <- ifelse(dat$cses_ptycont_rc==1 | dat$cses_ptycont_rp==1,1,0)

dat$dcontact[is.na(dat$dcontact)] <- dat$contact[is.na(dat$dcontact)]
dat$rcontact[is.na(dat$rcontact)] <- dat$contact[is.na(dat$rcontact)]

ok <- !is.na(dat$incgroup_prepost_x) & !is.na(dat$contact) & !is.na(dat$contact) 

dat.plot <- melt(dat[ok,], id.vars=c("incshort","category","weight_ftf"), measure.vars=c("contact"))

dat.plot <- dat.plot %>% 
  group_by( variable) %>% 
  mutate(n= sum(weight_ftf * as.numeric(!is.na(value)))) %>%
  group_by(variable, category) %>% 
  summarise(pct = sum(weight_ftf * value, na.rm=T)/n[1])


dat.plot$variable <- str_replace(dat.plot$variable, "dcontact","Democratic\nContact")
dat.plot$variable <- str_replace(dat.plot$variable, "rcontact","Republican\nContact")
dat.plot$variable <- str_replace(dat.plot$variable, "contact","Any Campaign\nContact")

dat.plot$variable <- factor(dat.plot$variable) 
levels(dat.plot$category) <- c("Reg", "UR", "ML","UL")

ggplot(dat.plot, aes(x=category, y=pct, colour = category))+
  #geom_point(size = 3)+
  geom_point( size = 7,  alpha = .7) +

  #facet_grid(~incshort) +
  theme_bw()  +
  scale_colour_manual("", values = category.color.scale) +
  scale_x_discrete("") +
  scale_y_continuous("Percent Reporting Contact", labels=percent)+
  #scale_fill_manual(values=c("purple","blue","red")) +
  theme(text = element_text(family = "Helvetica", color = "#586e75"),
        strip.background = element_rect(fill = "white", colour = gray(.75), size = .5),
        strip.text = element_text(colour = "black",face = "bold", size = 12),
        panel.border=element_rect(colour = gray(.15), size = .25),
        panel.grid.major.x=element_blank(), 
        panel.grid.minor.x=element_blank(), 
        legend.position="top", 
        legend.key = element_blank())


wtd.quantile(dat$incgroup_prepost_x[dat$contact == 1], dat$weight_ftf[dat$contact == 1], .5   )
wtd.quantile(dat$incgroup_prepost_x[dat$contact == 0], dat$weight_ftf[dat$contact == 0], .5   )
lm(contact ~ incgroup_prepost_x, data = dat, weights = weight_ftf) %>% anova
lm(contact ~ incgroup_prepost_x+category, data = dat, weights = weight_ftf) %>% anova


## Politics    -----  This section not included in paper

dat$welfare <- ifelse(dat$fedspend_welfare==1,1,0)
#dat$racialresentment <- ifelse(dat$resent_try == 1 | dat$resent_try == 2,1,0)
dat$satisfaction_govt <- ifelse(dat$cses_satisdem == 1 | dat$cses_satisdem == 2,1,0)
dat$obama <- ifelse(dat$pref_presrollup == 1,1,0)
dat$libcon <-  ifelse(dat$libcpo_self %in% c(5,6,7),1,0)
dat$police <- ifelse(dat$cses_exppolc %in% c(1,2), 1, 0)
dat$defense <- (dat$defsppr_self-1) /7
dat$defense[dat$defense < 0] <- NA
dat$contact <- ifelse(dat$cses_contct==1,1,0)


cols <- c("contact","welfare","obama")
labels <- c("Any Campaign\nContact","Increase\nWelfare Spending","Obama Support\n(Two-Party Vote)")
cats <- levels(dat$category)

plot.l <- list()
for(i in 1:length(cols)){
  plot.l[[i]] <- list()
  for(j in 1:length(cats)){
    ok <- dat$category==cats[j] & !is.na(dat[,cols[i]])
    tmp <- wtd.t.test(dat[ok,cols[i]], weight = dat$weight_ftf[ok])
    plot.l[[i]][[j]] <- data.frame(Group = cats[j], 
                                   Question = labels[i], 
                                   Mean = tmp$additional["Mean"], 
                                   SE = tmp$additional["Std. Err"])
  }
  plot.l[[i]] <- rbind_all(plot.l[[i]])
}
plot.l <- rbind_all(plot.l)
plot.l$Group <- as.factor(plot.l$Group)


plot.l$Group <- reorder(plot.l$Group, new.order = c(2,4,1,3))
levels(plot.l$Group) <- c("Reg", "UR", "ML","UL")


plot.l$Question <- factor(plot.l$Question)
#plot.l$Question <- reorder(plot.l$Question, new.order = c(1,2,3))


pdf("~/Box Sync/Unlisted/writeup/plots/contact_welfare_obama.pdf", width = 2.3, height = 6, family = "Helvetica")

ggplot(plot.l) +
  scale_y_continuous('',  labels=percent, lim =c(0,NA)) +
  theme_bw() + 
  geom_errorbar(aes(x = Group, y = Mean, ymin = Mean - 2*SE, ymax = Mean + 2*SE ),size=.6,width=0) + 
  geom_point(aes( x = Group, y=Mean, colour = Group), stat="identity", size = 7,  alpha = .7) +
  #coord_flip() +
  facet_wrap(~Question, nrow=3, scales="free_y")+
  theme(text = element_text(family = "Helvetica", color = "#586e75"),
        strip.background = element_rect(fill = "white", colour = gray(.75), size = .5),
        strip.text = element_text(colour = "black",face = "bold", size = 12),
        panel.border=element_rect(colour = gray(.15), size = .25),
        panel.grid.major.x=element_blank(), 
        panel.grid.minor.x=element_blank(), 
        legend.position="none", 
        legend.key = element_blank()) +
  scale_colour_manual("",  values = category.color.scale,labels = levels(dat$category))

dev.off()

wtd.mean(dat$contact[dat$category == "Registered" & dat$respondent_reg & dat$e2008g], dat$weight_ftf[dat$category == "Registered" & dat$respondent_reg& dat$e2008g])

wtd.mean(dat$contact[dat$category == "Mislisted" & dat$respondent_reg & dat$e2008g], dat$weight_ftf[dat$category == "Mislisted" & dat$respondent_reg& dat$e2008g])

tab <- xtabs(dat$weight_ftf ~ dat$category + dat$pid_self) 
tab %>% chisq.test()

tab <- tab %>% prop.table(1) %>% round(2)*100
colnames(tab) <- c("Dem", "Ind", "Rep")
tab %>% xtable(digits=0, caption = "Percent identifying with each of the two parties, or identifying as independent, by category. $p<.01$.", label = "tab:pid")
